In this version, I try to aggregate and play with the policy data to make an index. To do this, I use the COINr package.
#libraries:
pacman::p_load(COINr, ggbiplot, countrycode, reactable, tidyverse)
#Initialisation https://bluefoxr.github.io/COINrDoc/coins-the-currency-of-coinr.html
policydb <- read_csv("C:\\Users\\JRMA\\OneDrive - Folkehelseinstituttet\\Dokumenter\\CO-CREATE\\policy-export10-Dec-2021.csv", show_col_types = FALSE)
colnames(policydb) <- c("DB_type", "PolicyArea", "SubPArea", "PolicyAction", "Country", "Topics", "BenchmarkID", "PolicyAreaID", "PolicyDim")
policydb <- policydb[-c(1,6)]
country_hierch2 <- policydb %>% count(Country, PolicyAreaID, BenchmarkID)
country_hierch2
## # A tibble: 800 x 4
## Country PolicyAreaID BenchmarkID n
## <chr> <chr> <chr> <int>
## 1 Afghanistan I(2) I(2)1 1
## 2 Albania I(2) I(2)1 1
## 3 American Samoa H H7 1
## 4 Antigua and Barbuda H H7 1
## 5 Antigua and Barbuda I(2) I(2)1 1
## 6 Antigua and Barbuda I(2) I(2)3 1
## 7 Argentina I I1 1
## 8 Argentina I I3 1
## 9 Argentina I I4 1
## 10 Argentina I(2) I(2)1 1
## # ... with 790 more rows
#Country > PArea > SubPA > Count
truef_spa <- country_hierch2 %>% mutate(
PolicyActionImplemented = if_else(n > 0, TRUE, FALSE, missing=FALSE)
)
SPA_level_TF <- truef_spa %>%
select("Country", "BenchmarkID","PolicyActionImplemented") %>%
select(c("Country", "BenchmarkID")) %>% distinct() %>%
pivot_wider(names_from = "BenchmarkID", values_from = "BenchmarkID") %>%
ungroup() %>%
mutate(
across(.cols = -Country,
~if_else(!is.na(.), 1, 0)
)
)
AggHiearchy <- policydb %>% distinct(SubPArea, BenchmarkID, PolicyArea, PolicyAreaID)
PAreas <- policydb %>% distinct(PolicyArea, PolicyAreaID) #List of PAreas
SubPas <- policydb %>% distinct(SubPArea, BenchmarkID)
Let’s traslate to COINr tool
## Indicator data
COINr_SPA <- SPA_level_TF
names(COINr_SPA)[names(COINr_SPA) == "Country"] <- "UnitName"
COINr_SPA <- COINr_SPA %>% mutate(UnitCode = countrycode(sourcevar = UnitName, origin="country.name", destination="iso3c"))
## Warning in countrycode_convert(sourcevar = sourcevar, origin = origin, destination = dest, : Some values were not matched unambiguously: Gulf Cooperation Council, Micronesia, Romania, Wallis and Futuna
## Warning in countrycode_convert(sourcevar = sourcevar, origin = origin, destination = dest, : Some strings were matched more than once, and therefore set to <NA> in the result: Romania, Wallis and Futuna,ROU,WLF
#Some values are missing: Gulf Cooperation Council, Micronesia, Romania, Wallis and Futuna
Missing <- COINr_SPA %>% subset(is.na(UnitCode)) %>% select(UnitName, UnitCode)
#For this exercise these are dropped
COINr_SPA <- na.omit(COINr_SPA)
isCoCREATE <- c("NOR", "NLD", "PRT", "POL", "GBR")
#Continue adding groups for countries
COINr_SPA <- COINr_SPA %>% mutate(
Group_Continent = countrycode(sourcevar = UnitName, origin="country.name", destination="continent"),
Group_EU28 = countrycode(sourcevar = UnitName, origin="country.name", destination="eu28"),
Group_Region = countrycode(sourcevar = UnitName, origin="country.name", destination="region"),
Group_COCREATE = case_when(UnitCode %in% unlist(isCoCREATE) ~ TRUE, TRUE ~ FALSE))
## Warning in countrycode_convert(sourcevar = sourcevar, origin = origin, destination = dest, : Some values were not matched unambiguously: St Helena
#Consider adding certain variables etc for further analysis (eurostat stuff)
IndData <- COINr_SPA
IndData <- IndData %>% mutate_if(is_double,as.integer)
IndData
## # A tibble: 133 x 73
## UnitName `I(2)1` H7 `I(2)3` I1 I3 I4 `I(2)4` N2 G1 G2
## <chr> <int> <int> <int> <int> <int> <int> <int> <int> <int> <int>
## 1 Afghanistan 1 0 0 0 0 0 0 0 0 0
## 2 Albania 1 0 0 0 0 0 0 0 0 0
## 3 American S~ 0 1 0 0 0 0 0 0 0 0
## 4 Antigua an~ 1 1 1 0 0 0 0 0 0 0
## 5 Argentina 1 0 0 1 1 1 1 1 0 0
## 6 Australia 1 0 1 1 0 0 1 0 1 1
## 7 Austria 1 0 0 1 0 1 0 0 0 1
## 8 Bahamas 1 1 1 0 0 0 0 0 0 0
## 9 Bahrain 0 0 0 0 0 0 0 0 0 0
## 10 Bangladesh 1 0 0 0 0 0 0 0 0 0
## # ... with 123 more rows, and 62 more variables: N1 <int>, N3 <int>, N5 <int>,
## # N7 <int>, N8 <int>, O1 <int>, O2 <int>, O4 <int>, O5 <int>, O7 <int>,
## # R4 <int>, G7 <int>, S4 <int>, U1 <int>, G3 <int>, I(2)6 <int>, R10 <int>,
## # S2 <int>, G4 <int>, O6 <int>, H2 <int>, H3 <int>, H5 <int>, N(2)2 <int>,
## # N(2)3 <int>, R3 <int>, R5 <int>, S3 <int>, U5 <int>, R1 <int>, R2 <int>,
## # R6 <int>, I(2)2 <int>, U3 <int>, O3 <int>, R9 <int>, I(2)5 <int>, I5 <int>,
## # N4 <int>, U4 <int>, R7 <int>, S5 <int>, R8 <int>, I6 <int>, N6 <int>, ...
IndMeta <- AggHiearchy
colnames(IndMeta) <- c("PolicyArea", "IndName", "IndCode", "Agg_PolicyDimension")
IndMeta$Direction <- 1 #All indicators got positive direction
IndMeta$IndWeight <- 1 #All indicators carry the same weight
#IndMeta$Agg1_Indicators <- IndMeta$IndCode ##Avventer denne. Blir dobbel uans?
#IndMeta$Agg_PolicyDimension #Samles til en av de tre hovedkategoriene
#IndMeta$Agg_PolicyIndex #Alt samles
IndMeta$Agg_PolicyIndex <- "NOURISHING"
IndMeta <- relocate(IndMeta, Agg_PolicyDimension, .before = Agg_PolicyIndex)
IndMeta
## # A tibble: 67 x 7
## PolicyArea IndName IndCode Direction IndWeight Agg_PolicyDimen~
## <chr> <chr> <chr> <dbl> <dbl> <chr>
## 1 Nutrition label ~ "Mandatory nu~ N1 1 1 N
## 2 Nutrition label ~ "Trans fats i~ N2 1 1 N
## 3 Nutrition label ~ "Clearly visi~ N3 1 1 N
## 4 Nutrition label ~ "On-shelf lab~ N4 1 1 N
## 5 Nutrition label ~ "Calorie and ~ N5 1 1 N
## 6 Nutrition label ~ "Warning labe~ N6 1 1 N
## 7 Nutrition label ~ "Rules on nut~ N7 1 1 N
## 8 Nutrition label ~ "Rules on hea~ N8 1 1 N
## 9 Offer healthy fo~ "Fruit & vege~ O1 1 1 O
## 10 Offer healthy fo~ "Mandatory st~ O2 1 1 O
## # ... with 57 more rows, and 1 more variable: Agg_PolicyIndex <chr>
AggMeta <- PAreas
colnames(AggMeta) <- c("Name", "Code")
AggMeta$AgLevel <- 2
AggMeta$Weight <- 1 #Equal weight for all
AggMeta <- AggMeta %>% add_row(Name="Index", Code="NOURISHING", AgLevel=3, Weight=1)
AggMeta
## # A tibble: 11 x 4
## Name Code AgLevel Weight
## <chr> <chr> <dbl> <dbl>
## 1 Nutrition label standards and regulations on the use~ N 2 1
## 2 Offer healthy food and set standards in public insti~ O 2 1
## 3 Use economic tools to address food affordability and~ U 2 1
## 4 Restrict food advertising and other forms of commerc~ R 2 1
## 5 Improve nutritional quality of the whole food supply I 2 1
## 6 Set incentives and rules to create a healthy retail ~ S 2 1
## 7 Harness supply chain and actions across sectors to e~ H 2 1
## 8 Inform people about food and nutrition through publi~ I(2) 2 1
## 9 Nutrition advice and counselling in healthcare setti~ N(2) 2 1
## 10 Give nutrition education and skills G 2 1
## 11 Index NOURISH~ 3 1
Combine the IndData, IndMeta and AggMeta into one hierarchical list.
IndData
## # A tibble: 133 x 73
## UnitName `I(2)1` H7 `I(2)3` I1 I3 I4 `I(2)4` N2 G1 G2
## <chr> <int> <int> <int> <int> <int> <int> <int> <int> <int> <int>
## 1 Afghanistan 1 0 0 0 0 0 0 0 0 0
## 2 Albania 1 0 0 0 0 0 0 0 0 0
## 3 American S~ 0 1 0 0 0 0 0 0 0 0
## 4 Antigua an~ 1 1 1 0 0 0 0 0 0 0
## 5 Argentina 1 0 0 1 1 1 1 1 0 0
## 6 Australia 1 0 1 1 0 0 1 0 1 1
## 7 Austria 1 0 0 1 0 1 0 0 0 1
## 8 Bahamas 1 1 1 0 0 0 0 0 0 0
## 9 Bahrain 0 0 0 0 0 0 0 0 0 0
## 10 Bangladesh 1 0 0 0 0 0 0 0 0 0
## # ... with 123 more rows, and 62 more variables: N1 <int>, N3 <int>, N5 <int>,
## # N7 <int>, N8 <int>, O1 <int>, O2 <int>, O4 <int>, O5 <int>, O7 <int>,
## # R4 <int>, G7 <int>, S4 <int>, U1 <int>, G3 <int>, I(2)6 <int>, R10 <int>,
## # S2 <int>, G4 <int>, O6 <int>, H2 <int>, H3 <int>, H5 <int>, N(2)2 <int>,
## # N(2)3 <int>, R3 <int>, R5 <int>, S3 <int>, U5 <int>, R1 <int>, R2 <int>,
## # R6 <int>, I(2)2 <int>, U3 <int>, O3 <int>, R9 <int>, I(2)5 <int>, I5 <int>,
## # N4 <int>, U4 <int>, R7 <int>, S5 <int>, R8 <int>, I6 <int>, N6 <int>, ...
IndMeta
## # A tibble: 67 x 7
## PolicyArea IndName IndCode Direction IndWeight Agg_PolicyDimen~
## <chr> <chr> <chr> <dbl> <dbl> <chr>
## 1 Nutrition label ~ "Mandatory nu~ N1 1 1 N
## 2 Nutrition label ~ "Trans fats i~ N2 1 1 N
## 3 Nutrition label ~ "Clearly visi~ N3 1 1 N
## 4 Nutrition label ~ "On-shelf lab~ N4 1 1 N
## 5 Nutrition label ~ "Calorie and ~ N5 1 1 N
## 6 Nutrition label ~ "Warning labe~ N6 1 1 N
## 7 Nutrition label ~ "Rules on nut~ N7 1 1 N
## 8 Nutrition label ~ "Rules on hea~ N8 1 1 N
## 9 Offer healthy fo~ "Fruit & vege~ O1 1 1 O
## 10 Offer healthy fo~ "Mandatory st~ O2 1 1 O
## # ... with 57 more rows, and 1 more variable: Agg_PolicyIndex <chr>
NPI <- assemble(IndData = IndData, IndMeta = IndMeta, AggMeta = AggMeta)
## -----------------
## No denominators detected.
## -----------------
## -----------------
## Indicator codes cross-checked and OK.
## -----------------
## Number of indicators = 67
## Number of units = 133
## Number of aggregation levels = 2 above indicator level.
## -----------------
## Aggregation level 1 with 10 aggregate groups: N, O, U, R, I, S, H, I(2), N(2), G
## Cross-check between metadata and framework = OK.
## Aggregation level 2 with 1 aggregate groups: NOURISHING
## Cross-check between metadata and framework = OK.
## -----------------
NPI
## --------------
## A COIN with...
## --------------
## Input:
## Units: 133 (AFG, ALB, ASM, ...)
## Indicators: 67 (G1, G2, G3, ...)
## Denominators: 0 (none)
## Groups: 4 (Group_Continent, Group_EU28, Group_Region, ...)
##
## Structure:
## Level 1: 67 indicators (G1, G2, G3, ...)
## Level 2: 10 groups (G, H, I, ...)
## Level 3: 1 groups (NOURISHING)
##
## Data sets:
## Raw (133 units)
The sunburst plot shows the hierarchical structure and effective weights for the simplified NOURISHING index. Effective weights imply their relative contribution to the aggregated level, even with equal weighting applied. This is most apparent at indicator level: the N (level 2) has eight indicators contributing 0.0125 each, while in N(2), one indicator contributes 0.033. At sub-index level (N, O, U, R, I, S , H, I, N , G) they are contributing equally at 0.1 each.
framework <- plotframework(NPI)
framework
## Bin width defaults to 1/30 of the range of the data. Pick better value with `binwidth`.
An example of countries implementing N1: Mandatory nutrient lists on packaged food.
##Statistics and analysis There are collinear indicators and, even worse: significant negative indicator correlations. The latter is undesired due to the nature of aggregation. The indicators should contribute positivley to the aggregation, not the other way around.
The table (sort by MEAN), indicate which areas have the ‘best’ coverage. This is I(2)1, N1 and N7 respectively. Multiple variables have low coverage (sort by MEAN), e. g. G5, H4 and R8.
NPI <- getStats(NPI)
## Number of collinear indicators = 4
## Number of signficant negative indicator correlations = 6
NPI$Analysis$Raw$StatTable %>% roundDF() %>% reactable::reactable(resizable = TRUE, bordered = TRUE, highlight = TRUE, defaultPageSize = 10)
#Multivariate analysis ##Correlations It may be pointless assessing correlations because it is bound to happen with binary variables.
statlist <- getStats(NPI, dset = "Raw", out2="list")
## Number of collinear indicators = 4
## Number of signficant negative indicator correlations = 6
#Example of G5 being negativley correltaed to all other pillars.
statlist$Correlations[1:5, 1:5]
## IndCode G1 G2 G3 G4
## 1 G1 NA 0.38913667 0.3139536 0.11125541
## 2 G2 0.38913667 NA 0.2298933 0.08566778
## 3 G3 0.31395363 0.22989328 NA 0.12729659
## 4 G4 0.11125541 0.08566778 0.1272966 NA
## 5 G5 -0.03661751 -0.02344895 -0.0189185 -0.01891850
CorellOverview <- statlist$StatTable[ c("Indicator", "Collinearity", "Neg.Correls")]
CorellOverview
## # A tibble: 67 x 3
## Indicator Collinearity Neg.Correls
## <chr> <chr> <dbl>
## 1 G1 OK 0
## 2 G2 OK 0
## 3 G3 OK 0
## 4 G4 OK 0
## 5 G5 Collinear 0
## 6 G6 OK 0
## 7 G7 OK 0
## 8 H1 OK 0
## 9 H2 OK 0
## 10 H3 OK 0
## # ... with 57 more rows
We can also plot maps of correlations. The first plots indicators (benchmarks) against indicators, within each sub policy area (e.g. N or O). Grey is insignificant correlations. Again, considering the data, high correlations are likely.
This plot shows everything:
While indicator level may not be that interesting, the aggregation levels can yield more information. For a sensible aggregation, the levels should harmonise (same way) AND have a correlation of more than 0.3. Multiple of the correlations are less than 0.3 (ref grey zones).
CronAgg displays a high consistency of the sub-pillars within NOURISHING:
#The consistency of subpillars within NOURISHING: high reliability.
CronAgg <- getCronbach(NPI, dset="Aggregated", icodes="NOURISHING", aglev=2)
CronAgg
## [1] 0.8721922
A principal component analysis can be used for both dimensionality reduction and exploratory analysis. The following analyses we see that the ten pillars (NOURISHING) translates into ten Principal Components as listed in the table below.
Using the ten pillars, the PCA show that to explain NOURISHING, we explain 50% of the variance with the first principle component. Is this enough? I am not sure, but we want our variables to explain a significant proportion at least.
N.B: PCA assumes linearity. The following is used mostly for exploratory analysis.
NPI <- aggregate(NPI, dset = "Raw")
PCAres <- getPCA(NPI, dset="Aggregated", aglev=2, out2="list")
summary(PCAres$PCAresults$NOURISHING$PCAres)
## Importance of components:
## PC1 PC2 PC3 PC4 PC5 PC6 PC7
## Standard deviation 2.2345 1.1186 0.90762 0.83047 0.77121 0.69895 0.63438
## Proportion of Variance 0.4993 0.1251 0.08238 0.06897 0.05948 0.04885 0.04024
## Cumulative Proportion 0.4993 0.6244 0.70679 0.77576 0.83523 0.88409 0.92433
## PC8 PC9 PC10
## Standard deviation 0.59064 0.49056 0.40887
## Proportion of Variance 0.03489 0.02407 0.01672
## Cumulative Proportion 0.95922 0.98328 1.00000
The ggbiplot package can tell us more.
Three groupings are possible per now. Later, I want to include obesity rates (low/med/high).
First, EU countries versus others. GBR and the US explain a lot of the variance. It is a tendency that European countries to slightly better than non-European countries.
#Possible groupings for the future: OBESITY rate (low/med/high)
#Currently, choose between CO-CREATE countries, regions etc
#ggbiplot
ggbiplot(PCAres$PCAresults$NOURISHING$PCAres,
labels = NPI$Data$Aggregated$UnitCode,
groups = NPI$Data$Aggregated$Group_EU28)
The second plot indicate that CO-CREATE countries do slightly better than both EU and non-European countries in general - with the UK being the key driver among all countries
#Possible groupings for the future: OBESITY rate (low/med/high)
#Currently, choose between CO-CREATE countries, regions etc
#ggbiplot
ggbiplot(PCAres$PCAresults$NOURISHING$PCAres,
labels = NPI$Data$Aggregated$UnitCode,
groups = NPI$Data$Aggregated$Group_COCREATE)
#Normalisation Normalisation is not necessary if it is all on same scale, this is just to simplify the output. Let’s try for fun.
We can consider the “minmax”, “rank”, “prank” (percentile score).
NPI <- normalise(NPI, dset="Raw", ntype="rank")
##iplotIndDist2(NPI, dsets = c("Raw","Normalised"),
# icodes = "N1", ptype = "Histogram")
#Aggregation I will try different aggregation methods. We do not want total compensability because being good in one area should not compensate for implementing less in another area.
Possible methods are: Geometric and harmonised mean won’t work due to zeroes Copeland method (“copeland”) Arithmetic mean (“arith_mean”) Weighted mean (“median”) - wont really make sense here tbh
NPI <- aggregate(NPI, dset = "Raw", agtype_bylevel = c("arith_mean", "geom_mean"))
#Only index numbers after aggegation
NPI$Data$Aggregated[(ncol(NPI$Data$Aggregated)-10): ncol(NPI$Data$Aggregated)]
## # A tibble: 133 x 11
## G H I `I(2)` N `N(2)` O R S U NOURISHING
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 0 0 0 0.167 0 0 0 0 0 0 0.0167
## 2 0 0 0 0.167 0 0 0 0 0 0 0.0167
## 3 0 0.143 0 0 0 0 0 0 0 0 0.0143
## 4 0 0.143 0 0.333 0 0 0 0 0 0 0.0476
## 5 0 0 0.5 0.333 0.125 0 0 0 0 0 0.0958
## 6 0.286 0 0.167 0.5 0.625 0 0.714 0.1 0 0 0.239
## 7 0.286 0 0.333 0.167 0.375 0 0.429 0 0 0 0.159
## 8 0 0.143 0 0.333 0.125 0 0 0 0 0 0.0601
## 9 0 0 0 0 0.125 0 0.143 0 0.125 0.2 0.0593
## 10 0 0 0 0.167 0 0 0 0 0 0 0.0167
## # ... with 123 more rows
##Results After managing the data, let’s have a look at the output.
###Tables
#Rearrange data, output = NULL
NPI <- getResults(NPI, tab_type = "Summary", out2="COIN")
NPI$Results$SummaryScores %>% head(10)
## NULL
Here are the top ten performers. UK, followed by USA and Norway. There seems to be quite a lot of variation for the top 15 countries or so, before it evens out after.
getResults(NPI, tab_type = "Aggregates") %>%
reactable::reactable()
Let’s dive into the sub-policy areas:
iplotTable(NPI, dset="Aggregated", aglev = 2)
Maybe we want to find the strengths and weaknesses of a particular country?
#future
The full index score. Highlighted countries are CO-CREATE.
iplotBar(NPI, dset="Aggregated", usel=isCoCREATE, aglev = 3)
iplotMap(NPI, dset="Aggregated", isel="NOURISHING")
Let’s continue the greatness of life: radar charts. Click to select different countries. Note how all CO-CREATE countries score weak in R, S and U, but strong in N(2), I2 and O.
We should perform PCA/grouping analyses to see if different dimensions are coverd by countries or not.
Note how weak the median is for all countries lolz. We should probably limit ourselves to Europe.
#GROUP: EU28, COCREATE, REGION
iplotRadar(NPI, dset ="Aggregated", usel = isCoCREATE, aglev=2, addstat="groupmean", statgroup = "Group_COCREATE")
Finally, let’s try an interactive chart.
#resultsDash(NPI)
Aaand, make a report for one country:
#getUnitReport(NPI, usel="NOR", out_type = ".pdf")
#Analysing results
Get some data
library(stringr)
hlth_ehis_bm1e <- eurostat::get_eurostat("hlth_ehis_bm1e")
## Table hlth_ehis_bm1e cached at C:\Users\JRMA\AppData\Local\Temp\Rtmp2FYcW1/eurostat/hlth_ehis_bm1e_date_code_FF.rds
BMI_cat <- hlth_ehis_bm1e %>% select(bmi) %>% distinct()
hlth_ehis_bm1e$year <- str_sub(hlth_ehis_bm1e$time,1,4)
BMI_euro <- hlth_ehis_bm1e %>% filter(sex=="T" & age=="TOTAL" & isced11=="TOTAL" & bmi=="BMI_GE30" & year=="2019") %>% select(geo, values)
BMI_euro <- BMI_euro %>% mutate(geo2= countrycode(sourcevar = geo, origin="eurostat", destination="iso3c")) %>% select(geo2, values)
## Warning in countrycode_convert(sourcevar = sourcevar, origin = origin, destination = dest, : Some values were not matched unambiguously: EU27_2020
head(BMI_euro)
## # A tibble: 6 x 2
## geo2 values
## <chr> <dbl>
## 1 AUT 16.7
## 2 BEL 15.9
## 3 BGR 13.2
## 4 CYP 14.6
## 5 CZE 19.3
## 6 DEU 18.5
BMI_euro
## # A tibble: 32 x 2
## geo2 values
## <chr> <dbl>
## 1 AUT 16.7
## 2 BEL 15.9
## 3 BGR 13.2
## 4 CYP 14.6
## 5 CZE 19.3
## 6 DEU 18.5
## 7 DNK 15.9
## 8 EST 21.1
## 9 GRC 16.2
## 10 ESP 15.4
## # ... with 22 more rows
Combine
IndexScore_geo <- getResults(NPI, tab_type = "Summ") %>% select(UnitCode, NOURISHING)
IndexScore_geo <- left_join(IndexScore_geo, BMI_euro, by = c("UnitCode" ="geo2"))
#Add some missing data for this exercise
IndexScore_geo <- IndexScore_geo %>% mutate(values=replace(values, UnitCode=="USA", 36))
IndexScore_geo <- IndexScore_geo %>% mutate(values=replace(values, UnitCode=="GBR", 28))
IndexScore_geo <- IndexScore_geo %>% mutate(values=replace(values, UnitCode=="MYS", 19.7))
IndexScore_geo
## UnitCode NOURISHING values
## 1 GBR 0.60 28.0
## 2 USA 0.48 36.0
## 3 NOR 0.40 13.8
## 4 PRT 0.39 17.2
## 5 MYS 0.33 19.7
## 6 NLD 0.31 14.1
## 7 ESP 0.27 15.4
## 8 BEL 0.25 15.9
## 9 BRA 0.25 NA
## 10 DEU 0.24 18.5
## 11 POL 0.24 18.5
## 12 AUS 0.24 NA
## 13 HRV 0.24 22.6
## 14 SGP 0.21 NA
## 15 MEX 0.21 NA
## 16 ZAF 0.21 NA
## 17 FRA 0.20 14.4
## 18 CHL 0.20 NA
## 19 DNK 0.20 15.9
## 20 FIN 0.20 20.3
## 21 THA 0.19 NA
## 22 CAN 0.19 NA
## 23 SWE 0.18 14.7
## 24 HUN 0.18 23.9
## 25 LVA 0.17 22.3
## 26 FJI 0.17 NA
## 27 EST 0.16 21.1
## 28 AUT 0.16 16.7
## 29 KOR 0.15 NA
## 30 TON 0.14 NA
## 31 ROU 0.13 10.5
## 32 BGR 0.13 13.2
## 33 SVN 0.12 19.4
## 34 IRL 0.12 25.8
## 35 ECU 0.12 NA
## 36 NZL 0.10 NA
## 37 ARG 0.10 NA
## 38 CRI 0.10 NA
## 39 LTU 0.09 18.3
## 40 PER 0.09 NA
## 41 CHE 0.09 NA
## 42 IRN 0.09 NA
## 43 GRC 0.09 16.2
## 44 MLT 0.08 28.1
## 45 URY 0.08 NA
## 46 WSM 0.07 NA
## 47 ITA 0.07 11.4
## 48 CYP 0.07 14.6
## 49 SLV 0.07 NA
## 50 GTM 0.07 NA
## 51 BRB 0.07 NA
## 52 DMA 0.07 NA
## 53 CZE 0.07 19.3
## 54 ISL 0.07 21.7
## 55 IND 0.07 NA
## 56 BMU 0.06 NA
## 57 BLZ 0.06 NA
## 58 PHL 0.06 NA
## 59 BHS 0.06 NA
## 60 BHR 0.06 NA
## 61 PRY 0.06 NA
## 62 ISR 0.06 NA
## 63 PYF 0.05 NA
## 64 HND 0.05 NA
## 65 IDN 0.05 NA
## 66 VCT 0.05 NA
## 67 LUX 0.05 16.1
## 68 SVK 0.05 19.3
## 69 NRU 0.05 NA
## 70 VUT 0.05 NA
## 71 ATG 0.05 NA
## 72 GRD 0.05 NA
## 73 TWN 0.05 NA
## 74 ARE 0.05 NA
## 75 TUR 0.05 21.1
## 76 CHN 0.05 NA
## 77 COL 0.05 NA
## 78 VEN 0.05 NA
## 79 HKG 0.04 NA
## 80 JPN 0.04 NA
## 81 LIE 0.04 NA
## 82 NIC 0.04 NA
## 83 SYC 0.04 NA
## 84 COK 0.03 NA
## 85 KIR 0.03 NA
## 86 MUS 0.03 NA
## 87 PLW 0.03 NA
## 88 GUY 0.03 NA
## 89 JAM 0.03 NA
## 90 KNA 0.03 NA
## 91 BRN 0.03 NA
## 92 KWT 0.03 NA
## 93 MKD 0.03 NA
## 94 NCL 0.03 NA
## 95 TTO 0.03 NA
## 96 VNM 0.03 NA
## 97 NGA 0.03 NA
## 98 LKA 0.03 NA
## 99 GUM 0.03 NA
## 100 SLB 0.03 NA
## 101 ETH 0.02 NA
## 102 MAR 0.02 NA
## 103 SAU 0.02 NA
## 104 SHN 0.02 NA
## 105 AFG 0.02 NA
## 106 ALB 0.02 NA
## 107 BGD 0.02 NA
## 108 BEN 0.02 NA
## 109 BIH 0.02 NA
## 110 KHM 0.02 NA
## 111 CUB 0.02 NA
## 112 DOM 0.02 NA
## 113 GEO 0.02 NA
## 114 GHA 0.02 NA
## 115 KEN 0.02 NA
## 116 LBN 0.02 NA
## 117 MNG 0.02 NA
## 118 NAM 0.02 NA
## 119 NPL 0.02 NA
## 120 OMN 0.02 NA
## 121 PAN 0.02 NA
## 122 QAT 0.02 NA
## 123 SLE 0.02 NA
## 124 LCA 0.02 NA
## 125 ASM 0.01 NA
## 126 BTN 0.01 NA
## 127 VGB 0.01 NA
## 128 JOR 0.01 NA
## 129 MHL 0.01 NA
## 130 TZA 0.01 NA
## 131 UGA 0.01 NA
## 132 ZWE 0.01 NA
## 133 RUS 0.01 NA
Creating a fitted line we find that there is a positive relationship between higher implementation of nutrition policies and obesity rates.
regline <- IndexScore_geo %>% filter(!is.na(values)) %>% lm(NOURISHING ~ values,.) %>% fitted.values()
print("Fitted line")
## [1] "Fitted line"
mean(regline)
## [1] 0.1936364
corell <- cor.test(IndexScore_geo$NOURISHING, IndexScore_geo$values, method = "pearson")
corell
##
## Pearson's product-moment correlation
##
## data: IndexScore_geo$NOURISHING and IndexScore_geo$values
## t = 1.8585, df = 31, p-value = 0.07261
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.02993811 0.59523226
## sample estimates:
## cor
## 0.3166252
Create a scatterplot with NOURISHING score (y) and obesity rate (total population):
meanNourish <- mean(IndexScore_geo$NOURISHING)
plot <- IndexScore_geo %>% filter(!is.na(values)) %>%
plotly::plot_ly(x=~values, y= ~NOURISHING, mode="markers", text =~UnitCode) %>%
plotly::add_markers(y= ~NOURISHING) %>%
plotly::add_trace(x=~values, y=regline, mode="lines") %>%
plotly::add_lines(y=meanNourish) %>%
plotly::layout(showlegend=F) %>%
plotly::add_text(textposition="top right")
plot
## No trace type specified:
## Based on info supplied, a 'scatter' trace seems appropriate.
## Read more about this trace type -> https://plotly.com/r/reference/#scatter
#Sensitivity analyses